#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @File  : cj.py
# @Author: itlaolang
# @Date  : 2020-02-29 16:23
# @Contact : itlaolang@163.com 
# @Software : PyCharm
# @Desc  :描述
# coding=utf-8
import requests
from bs4 import BeautifulSoup
import time

# 请求头
from student.teacher.utitl import MysqlHelper

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) '
                  'Chrome/49.0.2623.112 Safari/537.36'}

def xq(xqurl):
    print(xqurl)

    html = requests.get(xqurl, headers=headers)
    htmltxt = BeautifulSoup(html.text, "html.parser")
    listdq=(htmltxt.find('div',attrs={'class':'f-crumbs f-w1190'})).find_all('a')
    onej=listdq[2].text.replace("合租房", "").replace("租房", "")
    twoj = listdq[3].text.replace("合租房", "").replace("租房", "")
    therej=listdq[4].text.replace("合租房", "").replace("租房", "")

    name=htmltxt.find('p',attrs={'class':'card-title'}).text.replace("\n", "")
    hm=htmltxt.find_all("li",attrs={'class':'item f-fl'})

    hx=hm[0].find('span',attrs={"class":"content"}).text
    mj = hm[1].find('span', attrs={"class": "content"}).text
    zflx=''
    if '整租' in mj:
        zflx='整租'
    elif '合租' in mj:
        zflx = '合租'
    elif '单间' in mj:
        zflx = '单间'
    imgurl='http:'+htmltxt.find("div",attrs={'class':'big-img-wrap'}).find('img').get('src')
    # print(imgurl.replace(" ", ""))
    price=htmltxt.find("span",attrs={"class":'price'}).text
    mj1=(mj.split('\xa0\xa0')[1]).replace("㎡", "")
    hm1 = htmltxt.find_all("li", attrs={'class': 'er-item f-fl'})
    if hm1[0].find('a')!=None:
        xqname=hm1[0].find('a').text.replace("\n", "")
    else:
        xqname=hm1[0].find('span',attrs={"class":'content'}).text.replace("\n", "")


    # print(name,price,hx,mj1)
    # print('######################')
    # print('一级区域：',onej)
    # print('二级区域:',twoj)
    # print("三级区域:",therej)
    # print("小区名称:",xqname)
    # print("标题:",name)
    # print("格局：",hx)
    # print("面积：",mj1)
    # print("价格：",price)
    # print("租房模式:",zflx)
    # print("图片:",imgurl)
    mysqlhe = MysqlHelper()
    cxsql='select * from gj where xqname="{0}" and name="{1}" and mj1="{2}" '.format(xqname,name,mj1)
    print(cxsql)
    cxlist=mysqlhe.get_all(cxsql)

    if len(cxlist)==0:
        sql="INSERT INTO gj (onej,twoj,therej,xqname,name,hx,mj1,price,zflx,imgurl)VALUES('{0}','{1}','{2}','{3}','{4}','{5}','{6}',{7},'{8}','{9}')".format(onej,twoj,therej,xqname,name,hx,mj1,int(price),zflx,imgurl)
        print(sql)
        mysqlhe.insert(sql)
    else:
        print("已存在")
for i in range(3,10):
    url='http://sh.ganji.com/chuzu/pn{0}/'.format(i)
    html=requests.get(url,headers=headers)
    htmltxt=BeautifulSoup(html.text,"html.parser")
    a=0
    for ii  in htmltxt.find_all("div",attrs={'class':'f-list-item ershoufang-list'}):
        a=a+1
        if ii.find('a').get('href').find('https://'):
            time.sleep(10)
            xq("https:"+ii.find('a').get('href'))
            # print(a)
            # print("2@@@@@@@@@@@@@@@@@@@@@@")

